#### Data and Diagnostics ####
# Load packages
library(psych)

# Load Data
mturk <- read.csv("MTurk_Poll_Perception.csv")

# Remove respondents spending less than 2 minutes on survey
mturk <- mturk[-c(which(mturk$Q_TotalDuration < 120)), ]
mturk <- mturk[-224, ] # respondent answered 0 questions

# Getting vars of interest
hist(mturk$muslim_reg_supp) #pre trt support - 3 is DK, 1 supp, 2 opp
mturk$support <- ifelse(mturk$muslim_reg_supp == 1, 1, 0) # support registration
mturk$oppose <- ifelse(mturk$muslim_reg_supp == 2, 1, 0) # oppose registration

mturk$ctrl_cond[is.na(mturk$ctrl_cond)] <- 0
mturk$supp_cond[is.na(mturk$supp_cond)] <- 0
mturk$opp_cond[is.na(mturk$opp_cond)] <- 0

hist(mturk$poll_accurate)
hist(mturk$poll_trust)
hist(mturk$poll_inform)

# Getting covariates of interest
hist(mturk$pol_int)

mturk$pid_7 <- ifelse(is.na(mturk$pid_d),
  ifelse(is.na(mturk$pid_r), mturk$pid_i, mturk$pid_r), mturk$pid_d)

hist(mturk$gender)
mturk$gender <- mturk$gender - 1

hist(mturk$ideo5) # 99 is 'not sure'
mturk$ideo5[mturk$ideo5 == 99] <- 3 # one person

hist(mturk$education)

mturk$race_1[is.na(mturk$race_1)] <- 0 # white
mturk$race_3[is.na(mturk$race_3)] <- 0 # middle eastern/north african (n=2)

mturk$know_term_sen <- ifelse(mturk$know_term_sen == 1, 1, 0) # 1 is correct
mturk$know_term_sen[is.na(mturk$know_term_sen)] <- 0

mturk$know_pres_veto <- ifelse(mturk$know_pres_veto == 2, 1, 0) # 2 is correct
mturk$know_pres_veto[is.na(mturk$know_pres_veto)] <- 0

mturk$know_scj <- ifelse(mturk$know_scj == 1, 1, 0) # 1 is correct
mturk$know_scj[is.na(mturk$know_scj)] <- 0

mturk$knowledge <- mturk$know_term_sen + mturk$know_scj + mturk$know_pres_veto

#### Manipulation Check ####
(length(which(mturk$manip_check[mturk$ctrl_cond == 1] == 1)) + 
    length(which(mturk$manip_check[mturk$ctrl_cond == 1] == 3))) / 
  sum(mturk$ctrl_cond) # control condition

length(which(mturk$manip_check[mturk$supp_cond == 1] == 1)) /
  sum(mturk$supp_cond) # support condition

length(which(mturk$manip_check[mturk$opp_cond == 1] == 2)) /
  sum(mturk$opp_cond) # oppose condition


#### Creating Poll Legitimacy Variable ####
# poll legitimacy taken as average over three measured dimensions
mturk$poll_legitimacy <- rep(0, length(mturk$poll_accurate))
for (i in 1:length(mturk$poll_accurate)) { 
  #written with conditionals to deal with NAs (only 2 of them)
  divisor <- 0 #keeps track of denominator when calculating average
  
  if (!is.na(mturk$poll_accurate[i])) {
    mturk$poll_legitimacy[i] <- mturk$poll_legitimacy[i] + mturk$poll_accurate[i]
    divisor <- divisor + 1
  }
  if (!is.na(mturk$poll_trust[i])) {
    mturk$poll_legitimacy[i] <- mturk$poll_legitimacy[i] + mturk$poll_trust[i]
    divisor <- divisor + 1
  }
  if(!is.na(mturk$poll_inform[i])) {
    mturk$poll_legitimacy[i] <- mturk$poll_legitimacy[i] + mturk$poll_inform[i]
    divisor <- divisor + 1
  }
  
  mturk$poll_legitimacy[i] <- mturk$poll_legitimacy[i] / divisor #average of rated dimensions
}

hist(mturk$poll_legitimacy, xlab = "Poll Legitimacy",
  main = "Poll Legitimacy - MTurk")

# normalize
mturk$poll_legitimacy_norm <- (mturk$poll_legitimacy - min(mturk$poll_legitimacy, na.rm = T)) / (max(mturk$poll_legitimacy) - min(mturk$poll_legitimacy))

# reliability scale
poll_df <- data.frame(mturk$poll_accurate, mturk$poll_trust, mturk$poll_inform)
psych::alpha(poll_df)

## Getting conditions from other omnibus survey experiments
# J's conditions
summary(mturk$DPledge) # dem no stand pledge
summary(mturk$RPledge) # rep no stand pledge
summary(mturk$NPPledge) # no party no stand pledge 

mturk$j_cond1 <- ifelse(!is.na(mturk$RPledge), 1, 0)
mturk$j_cond2 <- ifelse(!is.na(mturk$DPledge), 1, 0)
mturk$j_cond3 <- ifelse(!is.na(mturk$NPPledge), 1, 0)

# H's conditions
summary(mturk$vwsw) # victim white, suspect white
summary(mturk$vwsa)
summary(mturk$vasw) # victim arab, suspect white
summary(mturk$vasa)

mturk$h_cond1 <- ifelse(!is.na(mturk$vwsw), 1, 0)
mturk$h_cond2 <- ifelse(!is.na(mturk$vwsa), 1, 0)
mturk$h_cond3 <- ifelse(!is.na(mturk$vasw), 1, 0)
mturk$h_cond4 <- ifelse(!is.na(mturk$vasa), 1, 0)

## Split dataset on three conditions
support_data <- subset(mturk, mturk$supp_cond == 1)
oppose_data <- subset(mturk, mturk$opp_cond == 1)
control_data <- subset(mturk, mturk$ctrl_cond == 1)

#### Balance Check - Table A1 ####

supp_balance <- data.frame(support_data$gender, support_data$race_1,
  support_data$education, support_data$pol_int, support_data$pid_7,
  support_data$ideo5, support_data$knowledge)

opp_balance <- data.frame(oppose_data$gender, oppose_data$race_1,
  oppose_data$education, oppose_data$pol_int, oppose_data$pid_7,
  oppose_data$ideo5, oppose_data$knowledge)

ctrl_balance <- data.frame(control_data$gender, control_data$race_1,
  control_data$education, control_data$pol_int, control_data$pid_7,
  control_data$ideo5, control_data$knowledge)

opp_demograph_means <- apply(as.matrix(opp_balance), 2, mean, na.rm=T)
supp_demograph_means <- apply(as.matrix(supp_balance), 2, mean, na.rm=T)
ctrl_demograph_means <- apply(as.matrix(ctrl_balance), 2, mean, na.rm=T)

opp_demograph_means
supp_demograph_means
ctrl_demograph_means

# one-way anova
condition <- rep(0, length(mturk$V1))
for(i in 1:length(condition)) {
  if(mturk$ctrl_cond[i] == 1)
    condition[i] <- 1
  else if(mturk$supp_cond[i] == 1)
    condition[i] <- 2
  else if(mturk$opp_cond[i] == 1)
    condition[i] <- 3
}
condition <- as.factor(condition)

anova(lm(mturk$gender ~ condition))[5]
anova(lm(mturk$race_1 ~ condition))[5]
anova(lm(mturk$education ~ condition))[5]
anova(lm(mturk$pol_int ~ condition))[5]
anova(lm(mturk$pid_7 ~ condition))[5]
anova(lm(mturk$ideo5 ~ condition))[5]
anova(lm(mturk$knowledge ~ condition))[5]

## Table A2
round(matrix(c(
  
  # Control Cond, Prior Support
  length(which(control_data$poll_accurate[control_data$support == 1] >= 4)) /
    length(control_data$poll_accurate[control_data$support == 1]),
  length(which(control_data$poll_trust[control_data$support == 1] >= 4)) /
    length(control_data$poll_trust[control_data$support == 1]),
  length(which(control_data$poll_inform[control_data$support == 1] >= 4)) /
    length(control_data$poll_inform[control_data$support == 1]),
  length(which(control_data$support == 1)),
  
  # Control Cond, Prior Oppose
  length(which(control_data$poll_accurate[control_data$oppose == 1] >= 4)) /
    length(control_data$poll_accurate[control_data$oppose == 1]),
  length(which(control_data$poll_trust[control_data$oppose == 1] >= 4)) /
    length(control_data$poll_trust[control_data$oppose == 1]),
  length(which(control_data$poll_inform[control_data$oppose == 1] >= 4)) /
    length(control_data$poll_inform[control_data$oppose == 1]),
  length(which(control_data$oppose == 1)),
  
  # Support Cond, Prior Support
  length(which(support_data$poll_accurate[support_data$support == 1] >= 4)) /
    length(support_data$poll_accurate[support_data$support == 1]),
  length(which(support_data$poll_trust[support_data$support == 1] >= 4)) /
    length(support_data$poll_trust[support_data$support == 1]),
  length(which(support_data$poll_inform[support_data$support == 1] >= 4)) /
    length(support_data$poll_inform[support_data$support == 1]),
  length(which(support_data$support == 1)),
  
  # Support Cond, Prior Oppose
  length(which(support_data$poll_accurate[support_data$oppose == 1] >= 4)) /
    length(support_data$poll_accurate[support_data$oppose == 1]),
  length(which(support_data$poll_trust[support_data$oppose == 1] >= 4)) /
    length(support_data$poll_trust[support_data$oppose == 1]),
  length(which(support_data$poll_inform[support_data$oppose == 1] >= 4)) /
    length(support_data$poll_inform[support_data$oppose == 1]),
  length(which(support_data$oppose == 1)),
  
  # Oppose Cond, Prior Support
  length(which(oppose_data$poll_accurate[oppose_data$support == 1] >= 4)) /
    length(oppose_data$poll_accurate[oppose_data$support == 1]),
  length(which(oppose_data$poll_trust[oppose_data$support == 1] >= 4)) /
    length(oppose_data$poll_trust[oppose_data$support == 1]),
  length(which(oppose_data$poll_inform[oppose_data$support == 1] >= 4)) /
    length(oppose_data$poll_inform[oppose_data$support == 1]),
  length(which(oppose_data$support == 1)),
  
  # Oppose Cond, Prior Oppose
  length(which(oppose_data$poll_accurate[oppose_data$oppose == 1] >= 4)) /
    length(oppose_data$poll_accurate[oppose_data$oppose == 1]),
  length(which(oppose_data$poll_trust[oppose_data$oppose == 1] >= 4)) /
    length(oppose_data$poll_trust[oppose_data$oppose == 1]),
  length(which(oppose_data$poll_inform[oppose_data$oppose == 1] >= 4)) /
    length(oppose_data$poll_inform[oppose_data$oppose == 1]),
  length(which(oppose_data$oppose == 1))
  
), ncol = 6, nrow = 4, byrow = F),2)

#### Models ####
# subset out those with no prior opinion
support_data <- subset(support_data, support_data$support == 1 | support_data$oppose == 1)
oppose_data <- subset(oppose_data, oppose_data$support == 1 | oppose_data$oppose == 1)
control_data <- subset(control_data, control_data$support == 1 | control_data$oppose == 1)

## Table A3
m1.1_nbl <- lm(poll_legitimacy ~ support + knowledge + pol_int + ideo5 +
    pid_7 + education + gender + race_1, data = support_data)

m1.2_nbl <- lm(poll_legitimacy ~ support + knowledge + pol_int + ideo5 +
    pid_7 + education + gender + race_1, data = oppose_data)

m1.3_nbl <- lm(poll_legitimacy ~ support + knowledge + pol_int + ideo5 +
    pid_7 + education + gender + race_1, data = control_data)

summary(m1.1_nbl)
summary(m1.2_nbl)
summary(m1.3_nbl)

#### Figure 2 ####
scalar <- 2.6 # adjust everything for better interpretation of coefficients

par(las = 1)
plot(rev(scalar*coef(m1.1_nbl)[2:9]), 1:8, axes = F, # support
  ylab = "", xlab = "Coefficient Value", cex = 1.3, pch = 16,
  ylim = c(.5,8.5), xlim = c(-4,22))

points(rev(scalar*coef(m1.2_nbl)[2:9] + 9), 1:8, pch = 16, cex = 1.3) # oppose
points(rev(scalar*coef(m1.3_nbl)[2:9] + 18), 1:8, pch = 16, cex = 1.3) # control


segments(rev(scalar*confint(m1.1_nbl)[2:9,1]), 1:8, rev(scalar*confint(m1.1_nbl)[2:9,2]), 1:8)
segments(rev(scalar*confint(m1.2_nbl)[2:9,1] + 9), 1:8, rev(scalar*confint(m1.2_nbl)[2:9,2] + 9), 1:8)
segments(rev(scalar*confint(m1.3_nbl)[2:9,1] + 18), 1:8, rev(scalar*confint(m1.3_nbl)[2:9,2] + 18), 1:8)

axis(1, at=c(-3,-2,-1,0,1,2,3,6,7,8,9,10,11,12,15,16,17,18,19,20,21),
  c("-3","-2","-1","0","1","2","3","-3","-2","-1","0","1","2","3","-3","-2",
    "-1","0","1","2","3"))
axis(2, at = seq(from = 1, to = 8, by = 1), rev(c("Prior: Support", "Pol. Knowledge",
  "Pol. Interest", "Ideology", "Party ID", "Education", "Female", "White")), tick = F,
  pos = -2.75)


abline(v = 0, lty = 2)
abline(v = 9, lty = 2)
abline(v = 18, lty = 2)
abline(v = 4.5)
abline(v = 13.5)

mtext("Support Condition", side = 3, at = 0)
mtext("Oppose Condition", side = 3, at = 9)
mtext("Close Condition", side = 3, at = 18)

## look at results without ideology and partisanship -- Table A4
m1.1_npi <- lm(poll_legitimacy ~ support + knowledge + pol_int + education +
    gender + race_1, data = support_data)

m1.2_npi <- lm(poll_legitimacy ~ support + knowledge + pol_int + education +
    gender + race_1, data = oppose_data)

m1.3_npi <- lm(poll_legitimacy ~ support + knowledge + pol_int + education +
    gender + race_1, data = control_data)

summary(m1.1_npi)
summary(m1.2_npi)
summary(m1.3_npi)

## look at results without controls -- Table A5
m1.1_noc <- lm(poll_legitimacy ~ support, data = support_data)
m1.2_noc <- lm(poll_legitimacy ~ support, data = oppose_data)
m1.3_noc <- lm(poll_legitimacy ~ support, data = control_data)
summary(m1.1_noc)
summary(m1.2_noc)
summary(m1.3_noc)

## look at results while controlling for prior experimental condition -- Table A6
m1.1_full <- lm(poll_legitimacy ~ support + knowledge + pol_int + ideo5 + pid_7 +
    education + gender + race_1 + j_cond1 + j_cond2 + h_cond1 + h_cond2 + h_cond3,
  data = support_data)

m1.2_full <- lm(poll_legitimacy ~ support + knowledge + pol_int + ideo5 + pid_7 +
    education + gender + race_1 + j_cond1 + j_cond2 + h_cond1 + h_cond2 + h_cond3,
  data = oppose_data)

m1.3_full <- lm(poll_legitimacy ~ support + knowledge + pol_int + ideo5 + pid_7 +
    education + gender + race_1 + j_cond1 + j_cond2 + h_cond1 + h_cond2 + h_cond3,
  data = control_data)

summary(m1.1_full)
summary(m1.2_full)
summary(m1.3_full)


## look at results while interacting prior experimental condition -- Table A7
m1.1_int <- lm(poll_legitimacy ~ support + knowledge + pol_int + ideo5 + pid_7 +
    education + gender + race_1 + j_cond1 + j_cond2 + h_cond1 + h_cond2 + h_cond3 +
    j_cond1:h_cond1 + j_cond1:h_cond2 + j_cond1:h_cond3 + j_cond2:h_cond1 +
    j_cond2:h_cond2 + j_cond2:h_cond3,
  data = support_data)

m1.2_int <- lm(poll_legitimacy ~ support + knowledge + pol_int + ideo5 + pid_7 +
    education + gender + race_1 + j_cond1 + j_cond2 + h_cond1 + h_cond2 + h_cond3 +
    j_cond1:h_cond1 + j_cond1:h_cond2 + j_cond1:h_cond3 + j_cond2:h_cond1 +
    j_cond2:h_cond2 + j_cond2:h_cond3,
  data = oppose_data)

m1.3_int <- lm(poll_legitimacy ~ support + knowledge + pol_int + ideo5 + pid_7 +
    education + gender + race_1 + j_cond1 + j_cond2 + h_cond1 + h_cond2 + h_cond3 +
    j_cond1:h_cond1 + j_cond1:h_cond2 + j_cond1:h_cond3 + j_cond2:h_cond1 +
    j_cond2:h_cond2 + j_cond2:h_cond3,
  data = control_data)

summary(m1.1_int)
summary(m1.2_int)
summary(m1.3_int)

## models where muslim registration support predicted by prior treatment conditions -- Table A8

m_prior <- glm(support ~ j_cond1 + j_cond2 + h_cond1 + h_cond2 + h_cond3,
  data = mturk, family = binomial(link = 'logit'))
m_prior_int <- glm(support ~ j_cond1 + j_cond2 + h_cond1 + h_cond2 + h_cond3 +
    j_cond1:h_cond1 + j_cond1:h_cond2 + j_cond1:h_cond3 + j_cond2:h_cond1 +
    j_cond2:h_cond2 + j_cond2:h_cond3,
  data = mturk, family = binomial(link = 'logit'))

summary(m_prior)
summary(m_prior_int)

## Descriptive statistics of key IDVs / DVs (rescale from 0-1)
desc_stat <- matrix(c(
  mean((mturk$poll_trust[mturk$support == 1 | mturk$oppose == 1] - min(mturk$poll_trust, na.rm = T)) /
      (max(mturk$poll_trust, na.rm = T) - min(mturk$poll_trust, na.rm = T)), na.rm = T), # exclude DKs
  mean((support_data$poll_trust - min(support_data$poll_trust, na.rm = T)) /
      (max(support_data$poll_trust, na.rm = T) - min(support_data$poll_trust, na.rm = T)), na.rm = T),
  mean((oppose_data$poll_trust - min(oppose_data$poll_trust, na.rm = T)) /
      (max(oppose_data$poll_trust, na.rm = T) - min(oppose_data$poll_trust, na.rm = T)), na.rm = T),
  mean((control_data$poll_trust - min(control_data$poll_trust, na.rm = T)) /
      (max(control_data$poll_trust, na.rm = T) - min(control_data$poll_trust, na.rm = T)), na.rm = T),
  
  mean((mturk$poll_accurate[mturk$support == 1 | mturk$oppose == 1] - min(mturk$poll_accurate, na.rm = T)) /
      (max(mturk$poll_accurate, na.rm = T) - min(mturk$poll_accurate, na.rm = T)), na.rm = T),
  mean((support_data$poll_accurate - min(support_data$poll_accurate, na.rm = T)) /
      (max(support_data$poll_accurate, na.rm = T) - min(support_data$poll_accurate, na.rm = T)), na.rm = T),
  mean((oppose_data$poll_accurate - min(oppose_data$poll_accurate, na.rm = T)) /
      (max(oppose_data$poll_accurate, na.rm = T) - min(oppose_data$poll_accurate, na.rm = T)), na.rm = T),
  mean((control_data$poll_accurate - min(control_data$poll_accurate, na.rm = T)) /
      (max(control_data$poll_accurate, na.rm = T) - min(control_data$poll_accurate, na.rm = T)), na.rm = T),
  
  mean((mturk$poll_inform[mturk$support == 1 | mturk$oppose == 1] - min(mturk$poll_inform, na.rm = T)) /
      (max(mturk$poll_inform, na.rm = T) - min(mturk$poll_inform, na.rm = T)), na.rm = T),
  mean((support_data$poll_inform - min(support_data$poll_inform, na.rm = T)) /
      (max(support_data$poll_inform, na.rm = T) - min(support_data$poll_inform, na.rm = T)), na.rm = T),
  mean((oppose_data$poll_inform - min(oppose_data$poll_inform, na.rm = T)) /
      (max(oppose_data$poll_inform, na.rm = T) - min(oppose_data$poll_inform, na.rm = T)), na.rm = T),
  mean((control_data$poll_inform - min(control_data$poll_inform, na.rm = T)) /
      (max(control_data$poll_inform, na.rm = T) - min(control_data$poll_inform, na.rm = T)), na.rm = T),
  
  mean(mturk$support[mturk$support == 1 | mturk$oppose == 1], na.rm = T),
  mean(support_data$support, na.rm = T),
  mean(oppose_data$support, na.rm = T),
  mean(control_data$support, na.rm = T)
), ncol = 4, byrow = F)

desc_stat <- round(desc_stat, 2)
row.names(desc_stat) <- c("full","support","oppose","control")
colnames(desc_stat) <- c("trust","accurate","inform","support")
desc_stat

# credibility desc stats
mean((mturk$poll_legitimacy[mturk$support == 1 | mturk$oppose == 1] - min(mturk$poll_legitimacy, na.rm = T)) /
    (max(mturk$poll_legitimacy, na.rm = T) - min(mturk$poll_legitimacy, na.rm = T)), na.rm = T)
mean((support_data$poll_legitimacy - min(support_data$poll_legitimacy, na.rm = T)) /
    (max(support_data$poll_legitimacy, na.rm = T) - min(support_data$poll_legitimacy, na.rm = T)), na.rm = T)
mean((oppose_data$poll_legitimacy - min(oppose_data$poll_legitimacy, na.rm = T)) /
    (max(oppose_data$poll_legitimacy, na.rm = T) - min(oppose_data$poll_legitimacy, na.rm = T)), na.rm = T)
mean((control_data$poll_legitimacy - min(control_data$poll_legitimacy, na.rm = T)) /
    (max(control_data$poll_legitimacy, na.rm = T) - min(control_data$poll_legitimacy, na.rm = T)), na.rm = T)

#### MTurk Bot Check ####
# check for bots (duplicates in long|lat)
# look for same string for geographic information variable
# look at the debrief box for these responses
dupe_long <- mturk$LocationLongitude[which(duplicated(mturk$LocationLatitude) &
    duplicated(mturk$LocationLongitude))]
dupe_lat <- mturk$LocationLatitude[which(duplicated(mturk$LocationLatitude) &
    duplicated(mturk$LocationLongitude))]

dupes <- which(duplicated(mturk$LocationLatitude) & duplicated(mturk$LocationLongitude))
dupes2 <- which(duplicated(mturk$LocationLatitude[dupes]) & duplicated(mturk$LocationLongitude[dupes]))

which(duplicated(mturk$V6)) #ip address
# comparing above, all contained in first dupe set

which(mturk$LocationLongitude == mturk$LocationLongitude[dupes[7]])
# 7, 24
# 134, 137, 180, 31, 382, 479
mturk$end_response[c(134,137,180,31,382,479)] # no 'good' comments
